kernel_path =  (File.expand_path(__FILE__).split("/"))[0..-2].join("/") + '/'
require kernel_path +'env'
require kernel_path +'pdb_my_lib.rb'
require kernel_path +'log_lib'
require kernel_path +'db_lib'
require kernel_path +'blast_lib'

$bl2seq = get_path('bl2seq') #'/home/snepomny/structures/progs/blast-2.2.22/bin/bl2seq'
$seq_files = get_path('seq_files') #'/home/snepomny/structures/data/new_seq_files'

#-------------------------------------------------------------------
#simplest parser of TMScore output
def parse_tmscore_lines lines
	results = []
	lines.split("\n").each do |line| 
		results << line.split(" ")[0]
	end
	return results.join(" ")
end
#-------------------------------------------------------------------
#will use blast data to syntesize two aligned xyz structures for TmScore and run it
def rmsd_one_blast_hit(l_name,r_name,blast_offset_q,blast_offset_s,blast_q,blast_s)
	path = get_path('rxyz_files')
	l_name = l_name.sub("_","")
	r_name = r_name.sub("_","")
	
#puts "boq: #{blast_offset_q} bos: #{blast_offset_s}"

	ind_arrays = index_blast_qs_aligned(blast_q,blast_s,blast_offset_q.to_i-1,blast_offset_s.to_i-1) #-1 because blast starts from 1
	#q_name = "#{path}pdb#{l_name.gsub("_",".ent.")}.3c"
	#s_name = "#{path}pdb#{r_name.gsub("_",".ent.")}.3c"
	
	xyz_q = load_rxyz("#{path}#{l_name}.rxyz")
	xyz_s = load_rxyz("#{path}#{r_name}.rxyz")
	#xyz_q = %x[cat #{q_name}].split("\n")
	#xyz_s = %x[cat #{s_name}].split("\n")
	
#puts "Size of xyz_q is #{xyz_q.size.to_s}"
#puts "Size of xyz_s is #{xyz_s.size.to_s}"
	#if !Dir[q_name].empty? and !Dir[s_name].empty?
		#begin
			save_temp_xyz(xyz_q,xyz_s,ind_arrays,l_name,r_name)
#puts  "#{get_path('TMscore')} /tmp/q.xyz /tmp/s.xyz  | grep -e RMSD -e '-score' |  tail -6 | head -5 | cut -d'=' -f2]"
			tm = parse_tmscore_lines(%x[#{get_path('TMscore')} /tmp/#{l_name}#{r_name}s.xyz /tmp/#{l_name}#{r_name}q.xyz  | grep -e RMSD -e '-score' |  tail -6 | head -5 | cut -d'=' -f2])
			#grep -v RMSD= | grep -v predict | cut -d'=' -f2])
			%x[mv /tmp/#{l_name}#{r_name}q.xyz /tmp/q.xyz]
			%x[mv /tmp/#{l_name}#{r_name}s.xyz /tmp/s.xyz]
			return tm
			#tm = parse_tmscore_lines(%x[#{get_path('TMscore')}  /tmp/q.xyz /tmp/s.xyz | grep -e RMSD -e '-score' | grep -v RMSD= | grep -v predict | cut -d'=' -f2])
			#log_me("#{l_name}|#{r_name}|#{tm}")
		#rescue Exception => e
		#	err_log e
		#end
	#end
end
#-------------------------------------------------------------------
#will syntesizze TMScore friendly ATOMS line fro raw xyz and a counter
def form_legal_tmscore_line(counter,xyz_arr)
		return 'xyz_arr is nil in form_legal_tmscore_line' if !xyz_arr
		x = sprintf("%8.3f",xyz_arr.split(" ")[0].to_f)
		y = sprintf("%8.3f",xyz_arr.split(" ")[1].to_f)
		z = sprintf("%8.3f",xyz_arr.split(" ")[2].to_f)
					
		spaces_0 = ' ' if counter < 1000
		spaces_0 = '  ' if counter < 100
		spaces_0 = '   ' if counter < 10

		return sprintf('ATOM      1  CA  ILE A%s%d    %s%s%s',spaces_0,counter,x,y,z) 
end
#-------------------------------------------------------------------
#will index xyz data of 2 chains with blast alignment
def save_temp_xyz(xyz_q,xyz_s,ind_arrays,l_name,r_name)
	raise "misaligned!" if ind_arrays[0].size != ind_arrays[1].size
#  	puts "#{xyz_q.size},#{xyz_s.size}"
# 	puts xyz_q
#	puts xyz_s

# 	puts ind_arrays[0].size
# 	print ind_arrays[0]
#  	puts ""
#  	print ind_arrays[1]
# 	puts ""
	
	fs = File.new("/tmp/#{l_name}#{r_name}s.xyz","w")
	fq = File.new("/tmp/#{l_name}#{r_name}q.xyz","w")
	index_for_fs = ind_arrays[1]
	index_for_fq = ind_arrays[0]
	
	counter = 0
	(0..index_for_fs.size-1).each do |i|
		 if xyz_s[index_for_fs[i]]!="-" and xyz_q[index_for_fq[i]]!="-"
			counter += 1
			fs.puts form_legal_tmscore_line(counter,xyz_s[index_for_fs[i]])
			fq.puts form_legal_tmscore_line(counter,xyz_q[index_for_fq[i]])
		end
	end
	fs.flush
	fs.close
	fq.flush
	fq.close
end
#-------------------------------------------------------------------
#will read pairs of chains. from file
#if blast information is not given for any pair will run bl2seq 
#will use blast data to syntesize two aligned xyz structures for TmScore and run it
def rmsd_from_file fname='',limit=1
	f = File.new(fname,"r")
	counter = 1
	while (line = f.gets)
	
	##### doing it with blast2seq and dumping the data from file
	arr =  line.split(" ")
	q_name = arr[0]
	s_name = arr[1]
	
	#write_fasta_to_file(q_name)
	#write_fasta_to_file(s_name) #  #{$seq_files}/#{q_name.sub('_','')}.seq -j #{$seq_files}/#{s_name.sub('_','')}.seq  
	cmd ="#{$bl2seq} -i #{$seq_files}/#{q_name.sub("_","")}.seq -j #{$seq_files}/#{s_name.sub("_","")}.seq -p blastp  -F F > blast.tmp "
	%x[#{cmd}]
	
	iterate_blast_queries('blast.tmp', 1){ |q| 
		iterate_hits_of_single_query(q) { |h| 
			 hsh = parse_hit(s_name,h)[0]	
			 t0 = rmsd_one_blast_hit q_name,s_name,hsh[:left_q],hsh[:left_s],hsh[:seq_q],hsh[:seq_s]
			 log_me (line + " " + t0)
# 			 t1 = tmscore_RachelCode q_name.sub('_',''), s_name.sub('_','')
# 			 if (t0-t1).abs > 0.005
# 				%x[grep #{q_name} #{ARGV[0]} > valid_0] unless ARGV[0] == 'valid_0'
# 				raise "mismatch"
# 			 end

	#		 %x[rm /tmp/#{q_name.sub('_','')}]
	#		 %x[rm /tmp/#{s_name.sub('_','')}]
	#		 %x[rm *.seq]
			 #insert_pair(q_name, parse_hit(p_name,h) )
		}
	} ##### end doing it with blast2seq and dumping the data from file
			#rmsd_one_blast_hit(*line.split(" "))
			break if limit < counter += 1
			
	end
end
#-------------------------------------------------------------------
#a bit deprecated will have to rewrite
def rmsd_from_db(limit,where='')
	blast_hits_iterate(limit,where) { |l_name,r_name,blast_offset_q,blast_offset_s,blast_q,blast_s|
		rmsd_one_blast_hit l_name,r_name,blast_offset_q,blast_offset_s,blast_q,blast_s
  	}
end
#-------------------------------------------------------------------
#for validation only
def tmscore_RachelCode prot1, prot2
	#anyway Rachel works only on home env.
	str_files = '/home/snepomny/structures/data/new_str_files' 
	
	blast_pdbs = '/home/snepomny/structures/pdb_utils/bin/blast_pdbs'
	tmscore = '/home/snepomny/structures/progs/TMscore_32'
	
	cmd = " #{$bl2seq} -i #{$seq_files}/#{prot1}.seq -j #{$seq_files}/#{prot2}.seq -p blastp -F F > /tmp/blast"
	#puts cmd
	%x[#{cmd}]
	
	cmd = "#{blast_pdbs} #{str_files}/#{prot1}.brk  #{str_files}/#{prot2}.brk -B blast > /tmp/_output_"
	%x[#{cmd}]
	
	sline1 = `grep -n MOLECULE /tmp/_output_ | head -n 1 | awk -F: '{print $1}'`
	sline2 = `grep -n MOLECULE /tmp/_output_ | tail -n 1 | awk -F: '{print $1}'`
	eline1 = `grep -n END /tmp/_output_ | head -n 1 | awk -F: '{print $1}'`
	eline2 = `grep -n END /tmp/_output_ | tail -n 1 | awk -F: '{print $1}'`
	sline1.chomp!
	eline1.chomp!
	sline2.chomp!
	eline2.chomp!
	# 
	len1 = eline1.to_i - sline1.to_i
	len2 = eline2.to_i - sline2.to_i
	
	%x[head -n #{eline1} /tmp/_output_ | tail -n #{len1} > /tmp/#{prot1}]
	%x[head -n #{eline2} /tmp/_output_ | tail -n #{len2} > /tmp/#{prot2}]
	tm = parse_tmscore_lines(%x[#{tmscore} /tmp/#{prot1} /tmp/#{prot2} | grep -e RMSD -e '-score' | grep -v RMSD= | grep -v predict | cut -d'=' -f2 ]) #| grep -e ^RMSD -e ^TM-score -e ^GDT-TS -e ^GDT-HA -e ^Max -e ^Structure
	log_me "#{prot1} |#{prot2} |" + tm
	return tm.split("|")[0].to_f
end


